Training an Agent to play the game of Pong

Import necessary dependencies


In [16]:
import numpy as np
import gym

from neon.backends import gen_backend
from neon.backends import Autodiff
import random
import os

In [17]:
from IPython.display import Image
Image(filename='pong_architecture.jpg')


Out[17]:

In [18]:
be = gen_backend('cpu', batch_size = 128)
class Network:
    def __init__(self, D=80*80, H = 200, gamma = 0.99, restore_model = False):
        """
        D: No. of Image pixels
        H: No. of hidden units in first layer of Neural Network
        gamma: discount factor
        """
        self.gamma = gamma
        self.ll = {}
        self.learning_rate = 0.00001
        
        if restore_model and os.path.exists('model_weights.npy'):
            self.ll['W1'] = np.load('model_weights.npy').item()['W1']
            self.ll['W2'] = np.load('model_weights.npy').item()['W2']
        else:
            self.ll['W1'] = be.array(np.random.randn(H,D) / np.sqrt(D)) #random initialization of weight parameters followed by scaling
            self.ll['W2'] = be.array(np.random.randn(H,1) / np.sqrt(H))
        self.dW1 = be.array(np.zeros((H,D))) #random initialization of gradients
        self.dW2 = be.array(np.zeros((H,1)))
    
            
    #forward Propagation
    def policy_forward(self, x):
        # map visual input to the first hidden layer of a neural network
        
        h = be.dot(self.ll['W1'], be.array(x))
        h = be.sig(h)
        dlogp = be.dot(h.transpose(), self.ll['W2'])
        
        p = be.sig(dlogp)
        
        p_val = be.empty((1,1)) # Initialize an empty tensor of size 1X1
        h_val = be.empty((200,1))
        p_val[:] = p # Set values of the tensor to p
        h_val[:] = h
        return p_val.get(), h_val.get(), p, h
    
    #backward propagation
    def policy_backward(self, losses_op, episode_dlogps, episode_rewards):
        
        discounted_rewards = self.discount_rewards(episode_rewards)
        
        # to reduce the variance of the gradient estimator and avoid potential vanishing problems
        discounted_rewards -= np.mean(discounted_rewards)
        discounted_rewards /= np.std(discounted_rewards)
        
        episode_dlogps *= discounted_rewards # Modulating gradients with discount factor 
        
        """
        Compute gradients using Neon Backend
        """
        for i in range(len(losses_op)):
            ad = Autodiff(op_tree=losses_op[i]*be.array(episode_dlogps[i]), be = be, next_error=None)
            # compute gradients and assign them to self.dw1 and self.dw2
            ad.back_prop_grad([self.ll['W2'], self.ll['W1']], [self.dW2, self.dW1])
            # weights update:
            self.ll['W2'][:] = self.ll['W2'].get() -self.learning_rate *self.dW2.get()/len(losses_op)
            self.ll['W1'][:] = self.ll['W1'].get() -self.learning_rate *self.dW1.get()/len(losses_op)
        return
    
    def sigmoid(self, x):
        return 1.0/ (1.0 + np.exp(-x))
    
    def get_loss(self, y_fake, up_probability):
        loss = y_fake - up_probability
        return loss
    
    # Reward per time step
    # Reward < 0 if agent missed the ball and hence lost the game
    # Reward > 0 if agent won the game
    # Reward = zero if game in progress
    # The agent receives rewards generated by the game and implements discounted reward backwards with exponential moving average. The agent is rewarded more in the earlier stages 

    
    def discount_rewards(self, r):
        discounted_r = np.zeros_like(r)
        running_add = 0
        for t in reversed(range(0, r.size)):
            # if reward at index t is nonzero, then there is a positive/negative reward. This also marks a game boundary
            # for the sequence of game_actions produced by the agent
            if r[t] != 0.0: running_add = 0.0 
            # moving average given discount factor gamma, it assigns more weight to recent game actions
            running_add = running_add * self.gamma + r[t]
            discounted_r[t] = running_add
        return discounted_r
    
    # Preprocess a single frame before feeding it to the model
    def prepro(self, I):
        """
        Dimensions of the Image 210x160x3
        We'll downsample the image into a 6400 (80x80) 1D float vector
        """
        I = I[35:195] # crop
        I = I[::2, ::2, 0] # downsaple by a factor of 2
        I[I == 144] = 0 # erase background type 1
        I[I == 109] = 0 # erase background type 2
        I[I!=0] = 1 # Everything else (paddles, ball) equals to 1
        return I.astype(np.float).ravel() # Flattens
    
    # Stochastic process to choose an action ( moving up ) proportional to its predicted probability
    # Probability of choosing the opposite action is (1 - probability_up)
    # action == 2, moving up
    # action == 3, moving down
    def sample_action(self, up_probability):
        stochastic_value = np.random.uniform()
        action = 2 if stochastic_value < up_probability else 3
        return action

In [19]:
render = False               # to visualize agent 
restore_model = True        # to load a trained model when available

random.seed(2017)

D = 80 * 80                 # number of pixels in input
H = 200                     # number of hidden layer neurons
# Game environment
env = gym.make("Pong-v0")
network = Network(D=D, H=H, restore_model=restore_model)

# Each time step, the agent chooses an action, and the environment returns an observation and a reward.
# The process gets started by calling reset, which returns an initial observation
observation = env.reset()
prev_x = None

# hidden state, gradient ops, gradient values, rewards
hs, losses_op, dlogps, rewards = [],[],[], []
running_reward = None       # current reward
reward_sum = 0.0            # sum rewards
episode_number = 0

game_actions = []
game_rewards = []
game_gradients = []


[2018-01-24 11:43:35,836] Making new env: Pong-v0

Training process

An action consists of a paddle moving UP/DOWN, which generates a reward +1 (for every win) and -1 for every lost game at the end of each game.

Before knowing the result of a game, the model gets a fake label via the stochastic process explained before. This is like tossing a coin to decide to accept the log probabilities of a neural network. An optimal set of actions will maximize the sum of rewards along the game. An import event is when the agent wins/losses a game. But what caused this outcome?. The algorithm decided to modulate the loss functions of the network with the positive or negative rewards obtained from the environment and assign more weight to earlier actions using a moving average scheme. This logic is implement in function policy_backward() of the Network class.


In [20]:
while True:
    cur_x = network.prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

    up_probability, h_value, p, h = network.policy_forward(x)
    action = network.sample_action(up_probability)                              

    # assign a fake label, this decreases uncertainty and
    # this is one of the beauties of Reinforcement Learning
    y_fake = 1 if action == 2 else 0     
    
    # loss function gets closer to assigned label, the smaller difference
    # between probabilities the better
    # store gradients: derivative(log(p(x|theta)))       
    dlogp = np.abs(y_fake - up_probability)    
    # loss value
    dlogps.append(dlogp) 
    # loss op
    losses_op.append(be.absolute(y_fake - p))
    
    if render:
        env.render()
    
    #action: 
    #    0: no movement
    #    1: no movement
    #    2: up
    #    3: down
    #    4: up
    #    5: down
    observation, reward, done, info = env.step(action)
    
    # modifying rewards to favor longer games and thus to increase number of
    # positive rewards.
    reward = 0.0 if reward == 0.0 else reward
    reward = 1.0*len(game_rewards) if reward!=0.0 and len(game_rewards)>80 else reward
    reward = -1.0*len(game_rewards) if reward!=0.0 and len(game_rewards)<=50 else reward

    rewards.append(reward)
    reward_sum += reward
    
    game_actions.append(action)
    game_rewards.append(reward)
    game_gradients.append(dlogp[0][0])

    # end of a game
    # Pong has either +1 or -1 as reward when game ends.
    if reward != 0:  
        message = "Episode %d: game finished." % (episode_number)
        if reward < 0:
            message += "\x1b[0;31;40m  (RL loses)\x1b[0m"
        elif reward > 0:
            message += "\x1b[0;32;40m  (RL wins)\x1b[0m"
        print(message)
        print('Game duration: %d steps | Sum rewards: %f | Sum errors: %f' %(len(game_actions), np.sum(game_rewards), np.sum(game_gradients)))
        print('------------------------------------')
        game_actions = []
        game_rewards = []
        game_gradients = []
        
    # to save model
    if (episode_number+1)%10==0:
        np.save('model_weights.npy', network.ll)
        
    # end of an episode (minibatch of games)
    if done:
        episode_number +=1
        dlogps = np.vstack(dlogps)
        rewards = np.vstack(rewards)
        
        network.policy_backward(losses_op, dlogps, rewards)
        mean_loss = np.sum([x * x for x in dlogps])
        running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
        print('-----------------------------------------------')
        print('Episode %d has finished, time to backpropagate.' % (episode_number - 1))
        print('Total reward was %f Running_reward: %f Mean_loss: %f' % (reward_sum, running_reward, mean_loss))
        print('-----------------------------------------------')

        # reset game environment
        observation = env.reset()  
        reward_sum = 0
        prev_x = None        
        dlogps, rewards = [], []
        losses_op = []


Episode 0: game finished.  (RL wins)
Game duration: 91 steps | Sum rewards: 90.000000 | Sum errors: 45.376827
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.996187
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.053751
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.348719
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.202196
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.652737
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 25.071209
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.907280
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.019863
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.315050
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.609373
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.527157
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.099796
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.752838
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 23.975780
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.881439
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.041620
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.130939
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.476902
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.810513
------------------------------------
Episode 0: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 25.000055
------------------------------------
-----------------------------------------------
Episode 0 has finished, time to backpropagate.
Total reward was -826.000000 Running_reward: -826.000000 Mean_loss: 256.480743
-----------------------------------------------
Episode 1: game finished.  (RL wins)
Game duration: 85 steps | Sum rewards: 84.000000 | Sum errors: 42.490810
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.946438
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.579847
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.837622
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.841375
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.714645
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.594490
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.402008
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.490541
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.324638
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 23.984392
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.711149
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.340893
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 21.869722
------------------------------------
Episode 1: game finished.  (RL wins)
Game duration: 124 steps | Sum rewards: 123.000000 | Sum errors: 61.651245
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.953751
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.989508
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.629377
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.430344
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.415470
------------------------------------
Episode 1: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.782013
------------------------------------
-----------------------------------------------
Episode 1 has finished, time to backpropagate.
Total reward was -671.000000 Running_reward: -824.450000 Mean_loss: 275.514496
-----------------------------------------------
Episode 2: game finished.  (RL wins)
Game duration: 84 steps | Sum rewards: 83.000000 | Sum errors: 41.880867
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.116434
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.337032
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.946108
------------------------------------
Episode 2: game finished.  (RL wins)
Game duration: 130 steps | Sum rewards: 129.000000 | Sum errors: 65.125488
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.011459
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.082106
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.222404
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.595770
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.552696
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.311180
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.464745
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.826958
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.400503
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.002396
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.433922
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.065479
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.739319
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.059778
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.343660
------------------------------------
Episode 2: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.025152
------------------------------------
-----------------------------------------------
Episode 2 has finished, time to backpropagate.
Total reward was -660.000000 Running_reward: -822.805500 Mean_loss: 274.657654
-----------------------------------------------
Episode 3: game finished.  (RL wins)
Game duration: 82 steps | Sum rewards: 81.000000 | Sum errors: 41.260201
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.028727
------------------------------------
Episode 3: game finished.  (RL wins)
Game duration: 169 steps | Sum rewards: 168.000000 | Sum errors: 84.234009
------------------------------------
Episode 3: game finished.  (RL wins)
Game duration: 88 steps | Sum rewards: 87.000000 | Sum errors: 44.046509
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.571581
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.676413
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.946070
------------------------------------
Episode 3: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 62.653095
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.976618
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 42 steps | Sum rewards: -41.000000 | Sum errors: 20.745537
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.764835
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.445992
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.375484
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.693470
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.924984
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.800072
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.948545
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.360586
------------------------------------
Episode 3: game finished.  (RL wins)
Game duration: 121 steps | Sum rewards: 120.000000 | Sum errors: 60.416634
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.983295
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.984755
------------------------------------
Episode 3: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.727913
------------------------------------
-----------------------------------------------
Episode 3 has finished, time to backpropagate.
Total reward was -204.000000 Running_reward: -816.617445 Mean_loss: 344.047241
-----------------------------------------------
Episode 4: game finished.  (RL wins)
Game duration: 84 steps | Sum rewards: 83.000000 | Sum errors: 42.459381
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.242462
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.742649
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.034418
------------------------------------
Episode 4: game finished.  (RL wins)
Game duration: 124 steps | Sum rewards: 123.000000 | Sum errors: 62.226616
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.791370
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.055439
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.170609
------------------------------------
Episode 4: game finished.  (RL wins)
Game duration: 124 steps | Sum rewards: 123.000000 | Sum errors: 62.066013
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.156654
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.570257
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.926825
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.315411
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.676386
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.479473
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.161865
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.345392
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.423512
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.807337
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.020432
------------------------------------
Episode 4: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.559456
------------------------------------
-----------------------------------------------
Episode 4 has finished, time to backpropagate.
Total reward was -484.000000 Running_reward: -813.291271 Mean_loss: 291.025818
-----------------------------------------------
Episode 5: game finished.  (RL wins)
Game duration: 88 steps | Sum rewards: 87.000000 | Sum errors: 43.472973
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.702751
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.558153
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.809324
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.125082
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.810040
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.349684
------------------------------------
Episode 5: game finished.  (RL wins)
Game duration: 125 steps | Sum rewards: 124.000000 | Sum errors: 63.045841
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.786789
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.908707
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.560440
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.806335
------------------------------------
Episode 5: game finished.  (RL wins)
Game duration: 87 steps | Sum rewards: 86.000000 | Sum errors: 42.761303
------------------------------------
Episode 5: game finished.  (RL wins)
Game duration: 85 steps | Sum rewards: 84.000000 | Sum errors: 42.315704
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.661423
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.530340
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.568426
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.687080
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.376160
------------------------------------
Episode 5: game finished.  (RL wins)
Game duration: 128 steps | Sum rewards: 127.000000 | Sum errors: 63.428139
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.777283
------------------------------------
Episode 5: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.455549
------------------------------------
-----------------------------------------------
Episode 5 has finished, time to backpropagate.
Total reward was -264.000000 Running_reward: -807.798358 Mean_loss: 323.508972
-----------------------------------------------
Episode 6: game finished.  (RL wins)
Game duration: 87 steps | Sum rewards: 86.000000 | Sum errors: 42.932972
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.100105
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.795040
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.711660
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.696043
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.171198
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.145201
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.274719
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.880589
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.936844
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.659372
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.758095
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.216806
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 40 steps | Sum rewards: -39.000000 | Sum errors: 20.460245
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.552603
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.339682
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.055122
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.981815
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.182261
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.322472
------------------------------------
Episode 6: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.914934
------------------------------------
-----------------------------------------------
Episode 6 has finished, time to backpropagate.
Total reward was -823.000000 Running_reward: -807.950374 Mean_loss: 248.764359
-----------------------------------------------
Episode 7: game finished.  (RL wins)
Game duration: 87 steps | Sum rewards: 86.000000 | Sum errors: 43.650337
------------------------------------
Episode 7: game finished.  (RL wins)
Game duration: 128 steps | Sum rewards: 127.000000 | Sum errors: 63.959682
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.011723
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.339840
------------------------------------
Episode 7: game finished.  (RL wins)
Game duration: 86 steps | Sum rewards: 85.000000 | Sum errors: 42.359077
------------------------------------
Episode 7: game finished.  (RL wins)
Game duration: 91 steps | Sum rewards: 90.000000 | Sum errors: 45.743198
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.950720
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.626789
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.001173
------------------------------------
Episode 7: game finished.  (RL wins)
Game duration: 86 steps | Sum rewards: 85.000000 | Sum errors: 42.857857
------------------------------------
Episode 7: game finished.  (RL wins)
Game duration: 93 steps | Sum rewards: 92.000000 | Sum errors: 46.074448
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.088453
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.931339
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.323685
------------------------------------
Episode 7: game finished.  (RL wins)
Game duration: 122 steps | Sum rewards: 121.000000 | Sum errors: 60.421886
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.506107
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.336601
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 42 steps | Sum rewards: -41.000000 | Sum errors: 21.135885
------------------------------------
Episode 7: game finished.  (RL wins)
Game duration: 130 steps | Sum rewards: 129.000000 | Sum errors: 64.822479
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.627392
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.851404
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.677702
------------------------------------
Episode 7: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.790096
------------------------------------
-----------------------------------------------
Episode 7 has finished, time to backpropagate.
Total reward was 118.000000 Running_reward: -798.690871 Mean_loss: 382.103516
-----------------------------------------------
Episode 8: game finished.  (RL wins)
Game duration: 131 steps | Sum rewards: 130.000000 | Sum errors: 64.738899
------------------------------------
Episode 8: game finished.  (RL wins)
Game duration: 88 steps | Sum rewards: 87.000000 | Sum errors: 43.577827
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.729614
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.183626
------------------------------------
Episode 8: game finished.  (RL wins)
Game duration: 90 steps | Sum rewards: 89.000000 | Sum errors: 44.844486
------------------------------------
Episode 8: game finished.  (RL wins)
Game duration: 85 steps | Sum rewards: 84.000000 | Sum errors: 42.235703
------------------------------------
Episode 8: game finished.  (RL wins)
Game duration: 86 steps | Sum rewards: 85.000000 | Sum errors: 42.623444
------------------------------------
Episode 8: game finished.  (RL wins)
Game duration: 91 steps | Sum rewards: 90.000000 | Sum errors: 44.614811
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.453814
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.643700
------------------------------------
Episode 8: game finished.  (RL wins)
Game duration: 203 steps | Sum rewards: 202.000000 | Sum errors: 101.422699
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.155283
------------------------------------
Episode 8: game finished.  (RL wins)
Game duration: 130 steps | Sum rewards: 129.000000 | Sum errors: 64.556549
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.518169
------------------------------------
Episode 8: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 63.085018
------------------------------------
Episode 8: game finished.  (RL wins)
Game duration: 211 steps | Sum rewards: 210.000000 | Sum errors: 104.921616
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.592014
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.679869
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.795891
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.923773
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.757950
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.640434
------------------------------------
Episode 8: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.221453
------------------------------------
Episode 8: game finished.  (RL wins)
Game duration: 123 steps | Sum rewards: 122.000000 | Sum errors: 62.037086
------------------------------------
-----------------------------------------------
Episode 8 has finished, time to backpropagate.
Total reward was 760.000000 Running_reward: -783.103962 Mean_loss: 489.435852
-----------------------------------------------
Episode 9: game finished.  (RL wins)
Game duration: 128 steps | Sum rewards: 127.000000 | Sum errors: 63.455860
------------------------------------
Episode 9: game finished.  (RL wins)
Game duration: 83 steps | Sum rewards: 82.000000 | Sum errors: 41.611740
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.017082
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.177258
------------------------------------
Episode 9: game finished.  (RL wins)
Game duration: 128 steps | Sum rewards: 127.000000 | Sum errors: 63.487244
------------------------------------
Episode 9: game finished.  (RL wins)
Game duration: 130 steps | Sum rewards: 129.000000 | Sum errors: 64.564743
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.992744
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.507061
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.845440
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.480614
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.769018
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.410336
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.121029
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.650963
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.897141
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.974531
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.759390
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.539152
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.385057
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.446024
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.509871
------------------------------------
Episode 9: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.730160
------------------------------------
-----------------------------------------------
Episode 9 has finished, time to backpropagate.
Total reward was -362.000000 Running_reward: -778.892922 Mean_loss: 325.412201
-----------------------------------------------
Episode 10: game finished.  (RL wins)
Game duration: 85 steps | Sum rewards: 84.000000 | Sum errors: 42.212181
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.276016
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.300816
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.798595
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.192472
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.832428
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.750082
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.620180
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.681381
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.624960
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.227045
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.756048
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.268673
------------------------------------
Episode 10: game finished.  (RL wins)
Game duration: 127 steps | Sum rewards: 126.000000 | Sum errors: 63.573738
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.314249
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.801737
------------------------------------
Episode 10: game finished.  (RL wins)
Game duration: 85 steps | Sum rewards: 84.000000 | Sum errors: 41.882431
------------------------------------
Episode 10: game finished.  (RL wins)
Game duration: 86 steps | Sum rewards: 85.000000 | Sum errors: 42.509270
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.805412
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.232689
------------------------------------
Episode 10: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.504677
------------------------------------
Episode 10: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 62.964973
------------------------------------
-----------------------------------------------
Episode 10 has finished, time to backpropagate.
Total reward was -278.000000 Running_reward: -773.883993 Mean_loss: 324.511902
-----------------------------------------------
Episode 11: game finished.  (RL wins)
Game duration: 83 steps | Sum rewards: 82.000000 | Sum errors: 41.505131
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.732565
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.405073
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.425196
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.389635
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.557888
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.413317
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.972313
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.801592
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.467907
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.302120
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.641251
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.560106
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.403013
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.990831
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.353168
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.727112
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.775364
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.994715
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.205069
------------------------------------
Episode 11: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.728674
------------------------------------
-----------------------------------------------
Episode 11 has finished, time to backpropagate.
Total reward was -831.000000 Running_reward: -774.455153 Mean_loss: 256.089630
-----------------------------------------------
Episode 12: game finished.  (RL wins)
Game duration: 84 steps | Sum rewards: 83.000000 | Sum errors: 42.394855
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.030281
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.477442
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.479902
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.931940
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.826614
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.439049
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.503084
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.058121
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.846769
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.821163
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.376360
------------------------------------
Episode 12: game finished.  (RL wins)
Game duration: 127 steps | Sum rewards: 126.000000 | Sum errors: 63.482140
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.160065
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.016157
------------------------------------
Episode 12: game finished.  (RL wins)
Game duration: 131 steps | Sum rewards: 130.000000 | Sum errors: 65.289658
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.294037
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.758099
------------------------------------
Episode 12: game finished.  (RL wins)
Game duration: 125 steps | Sum rewards: 124.000000 | Sum errors: 61.820671
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.501459
------------------------------------
Episode 12: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.812037
------------------------------------
-----------------------------------------------
Episode 12 has finished, time to backpropagate.
Total reward was -315.000000 Running_reward: -769.860602 Mean_loss: 313.529602
-----------------------------------------------
Episode 13: game finished.  (RL wins)
Game duration: 88 steps | Sum rewards: 87.000000 | Sum errors: 43.725136
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.154339
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.109081
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.454967
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.953058
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.683392
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.850803
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.526922
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.922701
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.623356
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.602791
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 25.039165
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.698425
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.736320
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.444212
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.789553
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.368544
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.331263
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.847589
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.345285
------------------------------------
Episode 13: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.055353
------------------------------------
-----------------------------------------------
Episode 13 has finished, time to backpropagate.
Total reward was -837.000000 Running_reward: -770.531996 Mean_loss: 255.265656
-----------------------------------------------
Episode 14: game finished.  (RL wins)
Game duration: 165 steps | Sum rewards: 164.000000 | Sum errors: 82.004898
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.497650
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.863760
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.096117
------------------------------------
Episode 14: game finished.  (RL wins)
Game duration: 132 steps | Sum rewards: 131.000000 | Sum errors: 66.287491
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.400209
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.512070
------------------------------------
Episode 14: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 62.582714
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.580271
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.678013
------------------------------------
Episode 14: game finished.  (RL wins)
Game duration: 129 steps | Sum rewards: 128.000000 | Sum errors: 64.012955
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.694092
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.413282
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.327801
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.301004
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.495796
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.985935
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.678600
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.294142
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.934574
------------------------------------
Episode 14: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.659431
------------------------------------
-----------------------------------------------
Episode 14 has finished, time to backpropagate.
Total reward was -239.000000 Running_reward: -765.216676 Mean_loss: 335.253448
-----------------------------------------------
Episode 15: game finished.  (RL wins)
Game duration: 90 steps | Sum rewards: 89.000000 | Sum errors: 45.005791
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.806164
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.930639
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.075970
------------------------------------
Episode 15: game finished.  (RL wins)
Game duration: 129 steps | Sum rewards: 128.000000 | Sum errors: 64.546707
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 25.239859
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.086275
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 25.098637
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.417923
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 25.247402
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.384325
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.443111
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.243187
------------------------------------
Episode 15: game finished.  (RL wins)
Game duration: 127 steps | Sum rewards: 126.000000 | Sum errors: 63.071255
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.186867
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.391041
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.854671
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.232740
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.126274
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.015961
------------------------------------
Episode 15: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.243877
------------------------------------
-----------------------------------------------
Episode 15 has finished, time to backpropagate.
Total reward was -485.000000 Running_reward: -762.414509 Mean_loss: 298.228760
-----------------------------------------------
Episode 16: game finished.  (RL wins)
Game duration: 88 steps | Sum rewards: 87.000000 | Sum errors: 43.671532
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.358805
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.001511
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.293392
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.334276
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.566841
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.376951
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 42 steps | Sum rewards: -41.000000 | Sum errors: 20.936064
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.131329
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.446552
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.646927
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.446587
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.924397
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.520420
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.155254
------------------------------------
Episode 16: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 62.674713
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.933302
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.851574
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.371033
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.517797
------------------------------------
Episode 16: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.400442
------------------------------------
-----------------------------------------------
Episode 16 has finished, time to backpropagate.
Total reward was -648.000000 Running_reward: -761.270364 Mean_loss: 270.704590
-----------------------------------------------
Episode 17: game finished.  (RL wins)
Game duration: 82 steps | Sum rewards: 81.000000 | Sum errors: 40.936954
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.443205
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.659201
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.138784
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.829496
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.896561
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.321697
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.353205
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.814558
------------------------------------
Episode 17: game finished.  (RL wins)
Game duration: 127 steps | Sum rewards: 126.000000 | Sum errors: 62.909904
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.223003
------------------------------------
Episode 17: game finished.  (RL wins)
Game duration: 84 steps | Sum rewards: 83.000000 | Sum errors: 41.902092
------------------------------------
Episode 17: game finished.  (RL wins)
Game duration: 84 steps | Sum rewards: 83.000000 | Sum errors: 41.780582
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.324915
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.378807
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.867876
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.435509
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.482481
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.194534
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.591698
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.019209
------------------------------------
Episode 17: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.795763
------------------------------------
-----------------------------------------------
Episode 17 has finished, time to backpropagate.
Total reward was -458.000000 Running_reward: -758.237660 Mean_loss: 303.830200
-----------------------------------------------
Episode 18: game finished.  (RL wins)
Game duration: 179 steps | Sum rewards: 178.000000 | Sum errors: 89.779861
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.567371
------------------------------------
Episode 18: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 63.381622
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.059711
------------------------------------
Episode 18: game finished.  (RL wins)
Game duration: 289 steps | Sum rewards: 288.000000 | Sum errors: 143.731628
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.913052
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.701736
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.264977
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.101845
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.398651
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.724264
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.679424
------------------------------------
Episode 18: game finished.  (RL wins)
Game duration: 130 steps | Sum rewards: 129.000000 | Sum errors: 64.626717
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.496788
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.623251
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.950262
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.767519
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.882082
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.775711
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 22.992220
------------------------------------
Episode 18: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.746269
------------------------------------
-----------------------------------------------
Episode 18 has finished, time to backpropagate.
Total reward was -55.000000 Running_reward: -751.205283 Mean_loss: 379.057251
-----------------------------------------------
Episode 19: game finished.  (RL wins)
Game duration: 86 steps | Sum rewards: 85.000000 | Sum errors: 42.682533
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.795671
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.512348
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.547522
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.390924
------------------------------------
Episode 19: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 63.589035
------------------------------------
Episode 19: game finished.  (RL wins)
Game duration: 128 steps | Sum rewards: 127.000000 | Sum errors: 64.473602
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.673000
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.796017
------------------------------------
Episode 19: game finished.  (RL wins)
Game duration: 127 steps | Sum rewards: 126.000000 | Sum errors: 62.680397
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.955151
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.398808
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.952768
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.047480
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.659782
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.523111
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.300589
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.063253
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.931522
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.054165
------------------------------------
Episode 19: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.554697
------------------------------------
-----------------------------------------------
Episode 19 has finished, time to backpropagate.
Total reward was -308.000000 Running_reward: -746.773231 Mean_loss: 313.358887
-----------------------------------------------
Episode 20: game finished.  (RL wins)
Game duration: 87 steps | Sum rewards: 86.000000 | Sum errors: 42.998322
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.241093
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.494411
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.928276
------------------------------------
Episode 20: game finished.  (RL wins)
Game duration: 127 steps | Sum rewards: 126.000000 | Sum errors: 63.575485
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 41 steps | Sum rewards: -40.000000 | Sum errors: 19.997942
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.061417
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.734491
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.724445
------------------------------------
Episode 20: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 62.302235
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.818956
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.658478
------------------------------------
Episode 20: game finished.  (RL wins)
Game duration: 125 steps | Sum rewards: 124.000000 | Sum errors: 62.651829
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.002039
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.539118
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.721306
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.980255
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.109449
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.305441
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.936224
------------------------------------
Episode 20: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.970945
------------------------------------
-----------------------------------------------
Episode 20 has finished, time to backpropagate.
Total reward was -318.000000 Running_reward: -742.485498 Mean_loss: 313.597473
-----------------------------------------------
Episode 21: game finished.  (RL wins)
Game duration: 167 steps | Sum rewards: 166.000000 | Sum errors: 83.339752
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.213753
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.159939
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.517494
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.074991
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.474985
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.834597
------------------------------------
Episode 21: game finished.  (RL wins)
Game duration: 205 steps | Sum rewards: 204.000000 | Sum errors: 101.900726
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.544867
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.759846
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.366276
------------------------------------
Episode 21: game finished.  (RL wins)
Game duration: 131 steps | Sum rewards: 130.000000 | Sum errors: 65.043800
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.955742
------------------------------------
Episode 21: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 62.714859
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.921375
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.953680
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.209604
------------------------------------
Episode 21: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.311913
------------------------------------
Episode 21: game finished.  (RL wins)
Game duration: 212 steps | Sum rewards: 211.000000 | Sum errors: 105.677765
------------------------------------
Episode 21: game finished.  (RL wins)
Game duration: 209 steps | Sum rewards: 208.000000 | Sum errors: 104.032433
------------------------------------
Episode 21: game finished.  (RL wins)
Game duration: 131 steps | Sum rewards: 130.000000 | Sum errors: 64.726120
------------------------------------
-----------------------------------------------
Episode 21 has finished, time to backpropagate.
Total reward was 539.000000 Running_reward: -729.670643 Mean_loss: 453.336365
-----------------------------------------------
Episode 22: game finished.  (RL wins)
Game duration: 82 steps | Sum rewards: 81.000000 | Sum errors: 41.062222
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.406261
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.013258
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.380579
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.137337
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.252714
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.382954
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.253031
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.803783
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.047470
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.738173
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.276249
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.556034
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.849798
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.934359
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.800476
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.198492
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.033546
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.215620
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.385918
------------------------------------
Episode 22: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.849096
------------------------------------
-----------------------------------------------
Episode 22 has finished, time to backpropagate.
Total reward was -828.000000 Running_reward: -730.653937 Mean_loss: 253.629120
-----------------------------------------------
Episode 23: game finished.  (RL wins)
Game duration: 165 steps | Sum rewards: 164.000000 | Sum errors: 82.201355
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.184031
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.345943
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.348145
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.452379
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.098915
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.165562
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.697069
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.947063
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.329370
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.269222
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.433151
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.999788
------------------------------------
Episode 23: game finished.  (RL wins)
Game duration: 128 steps | Sum rewards: 127.000000 | Sum errors: 63.590347
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.341579
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.648266
------------------------------------
Episode 23: game finished.  (RL wins)
Game duration: 123 steps | Sum rewards: 122.000000 | Sum errors: 61.772243
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.999720
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.282188
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.442217
------------------------------------
Episode 23: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.878199
------------------------------------
-----------------------------------------------
Episode 23 has finished, time to backpropagate.
Total reward was -392.000000 Running_reward: -727.267398 Mean_loss: 308.804993
-----------------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 81 steps | Sum rewards: -1.000000 | Sum errors: 40.424957
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.130341
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.766117
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.045176
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.371401
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.927967
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.834356
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.841824
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.698603
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.993673
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.096718
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.732174
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.509102
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.761198
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 25.156206
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.050394
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.215816
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.213699
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.265503
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.419636
------------------------------------
Episode 24: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.408100
------------------------------------
-----------------------------------------------
Episode 24 has finished, time to backpropagate.
Total reward was -917.000000 Running_reward: -729.164724 Mean_loss: 253.968063
-----------------------------------------------
Episode 25: game finished.  (RL wins)
Game duration: 87 steps | Sum rewards: 86.000000 | Sum errors: 43.004387
------------------------------------
Episode 25: game finished.  (RL wins)
Game duration: 128 steps | Sum rewards: 127.000000 | Sum errors: 64.389328
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.827074
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.085945
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.480654
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.799988
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.471199
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.761536
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.698324
------------------------------------
Episode 25: game finished.  (RL wins)
Game duration: 130 steps | Sum rewards: 129.000000 | Sum errors: 64.983688
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.977739
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.544241
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.527140
------------------------------------
Episode 25: game finished.  (RL wins)
Game duration: 201 steps | Sum rewards: 200.000000 | Sum errors: 99.912369
------------------------------------
Episode 25: game finished.  (RL wins)
Game duration: 124 steps | Sum rewards: 123.000000 | Sum errors: 61.642487
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.931087
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.229723
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.729063
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.833517
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.604145
------------------------------------
Episode 25: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.261946
------------------------------------
-----------------------------------------------
Episode 25 has finished, time to backpropagate.
Total reward was -74.000000 Running_reward: -722.613076 Mean_loss: 355.059814
-----------------------------------------------
Episode 26: game finished.  (RL wins)
Game duration: 83 steps | Sum rewards: 82.000000 | Sum errors: 41.460873
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.371059
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.231693
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.860764
------------------------------------
Episode 26: game finished.  (RL wins)
Game duration: 209 steps | Sum rewards: 208.000000 | Sum errors: 103.977722
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.520098
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.305269
------------------------------------
Episode 26: game finished.  (RL wins)
Game duration: 128 steps | Sum rewards: 127.000000 | Sum errors: 63.866997
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.985695
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.274944
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.987076
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.854080
------------------------------------
Episode 26: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 62.650356
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.925125
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.643440
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.589176
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.341270
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.656910
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.495729
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.415373
------------------------------------
Episode 26: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.590902
------------------------------------
-----------------------------------------------
Episode 26 has finished, time to backpropagate.
Total reward was -246.000000 Running_reward: -717.846946 Mean_loss: 336.363281
-----------------------------------------------
Episode 27: game finished.  (RL wins)
Game duration: 89 steps | Sum rewards: 88.000000 | Sum errors: 43.766739
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.953470
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.955006
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.926533
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.580868
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.408205
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.238276
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.566429
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.319679
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.538311
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.972116
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.220942
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 25.052301
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.174456
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.744909
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.852663
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.941004
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.797899
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.838589
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.940012
------------------------------------
Episode 27: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 25.147865
------------------------------------
-----------------------------------------------
Episode 27 has finished, time to backpropagate.
Total reward was -833.000000 Running_reward: -718.998476 Mean_loss: 250.003586
-----------------------------------------------
Episode 28: game finished.  (RL wins)
Game duration: 86 steps | Sum rewards: 85.000000 | Sum errors: 42.803986
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.212923
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.137049
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.330345
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.931955
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.476778
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.894995
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.996374
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.180378
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.598740
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.644703
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.869633
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.398973
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.399191
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.605066
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.936117
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.294367
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.560898
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.496248
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.926651
------------------------------------
Episode 28: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.601614
------------------------------------
-----------------------------------------------
Episode 28 has finished, time to backpropagate.
Total reward was -837.000000 Running_reward: -720.178491 Mean_loss: 255.844513
-----------------------------------------------
Episode 29: game finished.  (RL wins)
Game duration: 85 steps | Sum rewards: 84.000000 | Sum errors: 41.788685
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.686787
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.193718
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.071157
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.696533
------------------------------------
Episode 29: game finished.  (RL wins)
Game duration: 130 steps | Sum rewards: 129.000000 | Sum errors: 65.103516
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.595343
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.166044
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 51 steps | Sum rewards: -50.000000 | Sum errors: 25.356886
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.665936
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.390562
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.091024
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.088116
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.639044
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.926683
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.846739
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 42 steps | Sum rewards: -41.000000 | Sum errors: 20.602097
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.059464
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.408274
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.344812
------------------------------------
Episode 29: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.128872
------------------------------------
-----------------------------------------------
Episode 29 has finished, time to backpropagate.
Total reward was -653.000000 Running_reward: -719.506706 Mean_loss: 272.648712
-----------------------------------------------
Episode 30: game finished.  (RL wins)
Game duration: 89 steps | Sum rewards: 88.000000 | Sum errors: 44.161579
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.047995
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.461727
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.948559
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.543573
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.434099
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.399717
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.939722
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.082514
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.634909
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.759607
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.107449
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.952690
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.525541
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.810974
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.827320
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.466324
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.618614
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.246685
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.849007
------------------------------------
Episode 30: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.334259
------------------------------------
-----------------------------------------------
Episode 30 has finished, time to backpropagate.
Total reward was -818.000000 Running_reward: -720.491639 Mean_loss: 250.560242
-----------------------------------------------
Episode 31: game finished.  (RL wins)
Game duration: 87 steps | Sum rewards: 86.000000 | Sum errors: 43.408756
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.566587
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.225189
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.247427
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.462831
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.634279
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.347439
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.542547
------------------------------------
Episode 31: game finished.  (RL wins)
Game duration: 129 steps | Sum rewards: 128.000000 | Sum errors: 64.709885
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.266685
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.710623
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.504250
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.687620
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.326525
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.342823
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.292297
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.849842
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.529652
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.883749
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.406586
------------------------------------
Episode 31: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.567785
------------------------------------
-----------------------------------------------
Episode 31 has finished, time to backpropagate.
Total reward was -659.000000 Running_reward: -719.876723 Mean_loss: 277.439117
-----------------------------------------------
Episode 32: game finished.  (RL wins)
Game duration: 131 steps | Sum rewards: 130.000000 | Sum errors: 65.267181
------------------------------------
Episode 32: game finished.  (RL wins)
Game duration: 90 steps | Sum rewards: 89.000000 | Sum errors: 44.199574
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.477694
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.211050
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.795301
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.398853
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.336884
------------------------------------
Episode 32: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 62.853642
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.393188
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.551310
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.138330
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.208918
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.108883
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.246885
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.559042
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.396585
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.521725
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.560822
------------------------------------
Episode 32: game finished.  (RL wins)
Game duration: 127 steps | Sum rewards: 126.000000 | Sum errors: 63.499825
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.255037
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.944191
------------------------------------
Episode 32: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.443884
------------------------------------
-----------------------------------------------
Episode 32 has finished, time to backpropagate.
Total reward was -353.000000 Running_reward: -716.207956 Mean_loss: 326.236389
-----------------------------------------------
Episode 33: game finished.  (RL wins)
Game duration: 86 steps | Sum rewards: 85.000000 | Sum errors: 42.543957
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.199802
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.944832
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.949238
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.548561
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.943714
------------------------------------
Episode 33: game finished.  (RL wins)
Game duration: 128 steps | Sum rewards: 127.000000 | Sum errors: 64.005783
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.026009
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.346724
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.856655
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.279730
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.415390
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.735163
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.757582
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.643744
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.110157
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.555099
------------------------------------
Episode 33: game finished.  (RL wins)
Game duration: 210 steps | Sum rewards: 209.000000 | Sum errors: 105.236320
------------------------------------
Episode 33: game finished.  (RL wins)
Game duration: 83 steps | Sum rewards: 82.000000 | Sum errors: 41.166813
------------------------------------
Episode 33: game finished.  (RL wins)
Game duration: 85 steps | Sum rewards: 84.000000 | Sum errors: 42.166908
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.276091
------------------------------------
Episode 33: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.477402
------------------------------------
-----------------------------------------------
Episode 33 has finished, time to backpropagate.
Total reward was -178.000000 Running_reward: -710.825876 Mean_loss: 345.620697
-----------------------------------------------
Episode 34: game finished.  (RL wins)
Game duration: 86 steps | Sum rewards: 85.000000 | Sum errors: 42.931141
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.767426
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.485008
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.250853
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.062263
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.725082
------------------------------------
Episode 34: game finished.  (RL wins)
Game duration: 127 steps | Sum rewards: 126.000000 | Sum errors: 63.205650
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.018471
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.676434
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.855659
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.337889
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.038939
------------------------------------
Episode 34: game finished.  (RL wins)
Game duration: 130 steps | Sum rewards: 129.000000 | Sum errors: 64.335205
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.032372
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.080343
------------------------------------
Episode 34: game finished.  (RL wins)
Game duration: 91 steps | Sum rewards: 90.000000 | Sum errors: 45.361725
------------------------------------
Episode 34: game finished.  (RL wins)
Game duration: 85 steps | Sum rewards: 84.000000 | Sum errors: 42.432144
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.167496
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.817099
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.691620
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.860313
------------------------------------
Episode 34: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.056879
------------------------------------
-----------------------------------------------
Episode 34 has finished, time to backpropagate.
Total reward was -258.000000 Running_reward: -706.297617 Mean_loss: 323.147491
-----------------------------------------------
Episode 35: game finished.  (RL wins)
Game duration: 87 steps | Sum rewards: 86.000000 | Sum errors: 43.410824
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.558739
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.657467
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.924480
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.710760
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.166155
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.246775
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.881502
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.560453
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.418184
------------------------------------
Episode 35: game finished.  (RL wins)
Game duration: 129 steps | Sum rewards: 128.000000 | Sum errors: 64.632278
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.433691
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.534403
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.757273
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.944252
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.620754
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.599972
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.679800
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.419067
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.528240
------------------------------------
Episode 35: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.220938
------------------------------------
-----------------------------------------------
Episode 35 has finished, time to backpropagate.
Total reward was -653.000000 Running_reward: -705.764641 Mean_loss: 276.092834
-----------------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 81 steps | Sum rewards: -1.000000 | Sum errors: 40.128674
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.297388
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.926521
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.508629
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.423769
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.072962
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.898241
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.624340
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.273983
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.232664
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.704182
------------------------------------
Episode 36: game finished.  (RL wins)
Game duration: 121 steps | Sum rewards: 120.000000 | Sum errors: 60.205086
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.374794
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.222942
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.140320
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.630001
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.499453
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.530561
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.095142
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.778744
------------------------------------
Episode 36: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.924726
------------------------------------
-----------------------------------------------
Episode 36 has finished, time to backpropagate.
Total reward was -745.000000 Running_reward: -706.156995 Mean_loss: 268.319092
-----------------------------------------------
Episode 37: game finished.  (RL wins)
Game duration: 85 steps | Sum rewards: 84.000000 | Sum errors: 42.253090
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.222748
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.791479
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.741587
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.678459
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.291447
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.174355
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.003063
------------------------------------
Episode 37: game finished.  (RL wins)
Game duration: 132 steps | Sum rewards: 131.000000 | Sum errors: 66.001633
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.580336
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.830250
------------------------------------
Episode 37: game finished.  (RL wins)
Game duration: 121 steps | Sum rewards: 120.000000 | Sum errors: 59.787422
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.060114
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.476347
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.932034
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.227325
------------------------------------
Episode 37: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 62.741886
------------------------------------
Episode 37: game finished.  (RL wins)
Game duration: 129 steps | Sum rewards: 128.000000 | Sum errors: 64.799683
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 23.947855
------------------------------------
Episode 37: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.941151
------------------------------------
Episode 37: game finished.  (RL wins)
Game duration: 133 steps | Sum rewards: 132.000000 | Sum errors: 66.434868
------------------------------------
-----------------------------------------------
Episode 37 has finished, time to backpropagate.
Total reward was 37.000000 Running_reward: -698.725425 Mean_loss: 355.391418
-----------------------------------------------
Episode 38: game finished.  (RL wins)
Game duration: 91 steps | Sum rewards: 90.000000 | Sum errors: 45.223900
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 43 steps | Sum rewards: -42.000000 | Sum errors: 21.549694
------------------------------------
Episode 38: game finished.  (RL wins)
Game duration: 370 steps | Sum rewards: 369.000000 | Sum errors: 184.889252
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.472555
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 25.238743
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.219042
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 22.069704
------------------------------------
Episode 38: game finished.  (RL wins)
Game duration: 126 steps | Sum rewards: 125.000000 | Sum errors: 62.637062
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.160406
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.979136
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.413969
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.979652
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.268328
------------------------------------
Episode 38: game finished.  (RL wins)
Game duration: 133 steps | Sum rewards: 132.000000 | Sum errors: 65.492111
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.021362
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 22.964750
------------------------------------
Episode 38: game finished.  (RL wins)
Game duration: 123 steps | Sum rewards: 122.000000 | Sum errors: 61.270538
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.052639
------------------------------------
Episode 38: game finished.  (RL wins)
Game duration: 129 steps | Sum rewards: 128.000000 | Sum errors: 64.004707
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.560789
------------------------------------
Episode 38: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.044809
------------------------------------
-----------------------------------------------
Episode 38 has finished, time to backpropagate.
Total reward was 282.000000 Running_reward: -688.918171 Mean_loss: 414.374939
-----------------------------------------------
Episode 39: game finished.  (RL wins)
Game duration: 88 steps | Sum rewards: 87.000000 | Sum errors: 44.042332
------------------------------------
Episode 39: game finished.  (RL wins)
Game duration: 128 steps | Sum rewards: 127.000000 | Sum errors: 64.047050
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.558523
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.273388
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.151747
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.623749
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 48 steps | Sum rewards: -47.000000 | Sum errors: 24.236732
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.925293
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 49 steps | Sum rewards: -48.000000 | Sum errors: 24.350193
------------------------------------
Episode 39: game finished.  (RL wins)
Game duration: 131 steps | Sum rewards: 130.000000 | Sum errors: 65.413040
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 22.627125
------------------------------------
Episode 39: game finished.  (RL wins)
Game duration: 127 steps | Sum rewards: 126.000000 | Sum errors: 62.647881
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.702129
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.896450
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 50 steps | Sum rewards: -49.000000 | Sum errors: 24.918119
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 46 steps | Sum rewards: -45.000000 | Sum errors: 23.358160
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.277214
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 44 steps | Sum rewards: -43.000000 | Sum errors: 21.733173
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 45 steps | Sum rewards: -44.000000 | Sum errors: 22.461887
------------------------------------
Episode 39: game finished.  (RL loses)
Game duration: 47 steps | Sum rewards: -46.000000 | Sum errors: 23.031422
------------------------------------
Episode 39: game finished.  (RL wins)
Game duration: 127 steps | Sum rewards: 126.000000 | Sum errors: 62.949379
------------------------------------
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-20-45451aedc503> in <module>()
     70         rewards = np.vstack(rewards)
     71 
---> 72         network.policy_backward(losses_op, dlogps, rewards)
     73         mean_loss = np.sum([x * x for x in dlogps])
     74         running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01

<ipython-input-18-6c85b6f7855e> in policy_backward(self, losses_op, episode_dlogps, episode_rewards)
     54             ad = Autodiff(op_tree=losses_op[i]*be.array(episode_dlogps[i]), be = be, next_error=None)
     55             # compute gradients and assign them to self.dw1 and self.dw2
---> 56             ad.back_prop_grad([self.ll['W2'], self.ll['W1']], [self.dW2, self.dW1])
     57             # weights update:
     58             self.ll['W2'][:] = self.ll['W2'].get() -self.learning_rate *self.dW2.get()/len(losses_op)

/home/konsang/neon/neon/backends/autodiff.py in back_prop_grad(self, tensors, gradients)
    417             else:
    418                 grad_buffer[:] = self.map_tensor_grad_op_tree.get(
--> 419                     tensor._original_base, grad_buffer * 0.)
    420 
    421         if skipped_tensor:

/home/konsang/neon/neon/backends/nervanacpu.py in __setitem__(self, key, value)
    160         """
    161 
--> 162         self.__getitem__(key)._assign(value)
    163         return self
    164 

/home/konsang/neon/neon/backends/nervanacpu.py in _assign(self, value)
    222         """
    223         if isinstance(value, (CPUTensor, OpTreeNode)):
--> 224             OpTreeNode.build("assign", self, value)
    225         elif isinstance(value, (int, float, np.ndarray)):
    226             self.set(value)

/home/konsang/neon/neon/backends/backend.py in build(op, a, b, out, **kwargs)
   1840         # execute explicit assignment
   1841         if op == "assign":
-> 1842             return node.execute()
   1843 
   1844         # passing in an out value counts as assignment

/home/konsang/neon/neon/backends/backend.py in execute(self)
   1861 
   1862         if isinstance(backend, Backend):
-> 1863             return backend.execute(self)
   1864         else:
   1865             raise NotImplementedError()

/home/konsang/neon/neon/backends/nervanacpu.py in execute(self, optree, numpy_call_dict)
    698                     right = compute_stack.pop()
    699                     left = compute_stack.pop()
--> 700                     compute_stack.append(numpy_call_dict[p['op']](left, right))
    701                 elif p['op'] in OpCollection.reduction_ops:
    702                     left = compute_stack.pop()

/home/konsang/neon/neon/backends/nervanacpu.py in <lambda>(left, right)
    512     "minimum": lambda left, right: np.minimum(left, right),
    513     "maximum": lambda left, right: np.maximum(left, right),
--> 514     "dot": lambda left, right: np.dot(left, right),
    515     # reduction ops
    516     "sum": lambda op_dict, left: np.sum(left, axis=op_dict['axis'], keepdims=True),

KeyboardInterrupt: 

In [ ]: